In [28]:
import os
import pandas as pd
from sklearn import metrics
import plotly.express as px
from plotly.offline import init_notebook_mode
from typing import List, Dict, Callable, Iterable, Tuple

init_notebook_mode(connected=True)
In [5]:
os.getcwd()
Out[5]:
'/Users/cem/Documents/BHT_datascience/master_thesis/master-thesis/src/notebook'
In [6]:
# set working directory to root of project
print('Notebook path:', os.getcwd())
os.chdir(f'{os.getcwd()}/../../')
print('New working directory', os.getcwd())
Notebook path: /Users/cem/Documents/BHT_datascience/master_thesis/master-thesis/src/notebook
New working directory /Users/cem/Documents/BHT_datascience/master_thesis/master-thesis
In [7]:
# module imports
from src.Configurations import Configurations
In [8]:
# load all configurations
cfg = Configurations('src/configuration.yaml')
In [9]:
df_medic = pd.read_csv(f'{cfg.output_path_medic_all_metadata_updated_with_predictions}', sep='\t')
amount_of_sample = df_medic['image_id'].count()
amount_of_sample
Out[9]:
71198
In [18]:
disaster_types_key = 'disaster_types'
informative_key = 'informative'
humanitarian_key = 'humanitarian'
damage_severity_key = 'damage_severity'
all_tasks = [disaster_types_key, informative_key, humanitarian_key, damage_severity_key]


def calculate_task_class_metrics_closure(tasks: List[str] = all_tasks) -> Callable:

    def calculate_task_class_metrics(group_df: pd.DataFrame):
        """
        Use ONLY for the classification variables (tasks)!
        """

        def get_task_class_report_closure(_df: pd.DataFrame) -> Callable:
            """
            Closure to return actual map function to calculate the class reports per
            task.

            Holds the dataframe containing the data in order to prevent side effects
            and for efficiency reasons.
            """

            def get_task_class_report(_task: str) -> pd.DataFrame:
                """
                Extracts the class report as a dataframe for a single task.
                """
                y: pd.Series = _df[_task]
                y_hat: pd.Series = _df[f'prediction {_task}']

                # averaging = 'weighted'
                fallback_zero_division = 0

                task_class_report: Dict[str, float] = metrics.classification_report(y, y_hat, output_dict=True, zero_division=fallback_zero_division)
                task_class_report_df: pd.DataFrame = pd.DataFrame(task_class_report)\
                    .reset_index()\
                    .rename(columns={'index': 'metric'})

                return task_class_report_df

            return get_task_class_report

        all_tasks_class_reports: Iterable[pd.DataFrame] = map(get_task_class_report_closure(group_df), tasks)
        concatenated_task_class_reports_df: pd.DataFrame = pd.concat(all_tasks_class_reports, axis=0)
        concatenated_task_class_reports_df = concatenated_task_class_reports_df.round(decimals=2)

        return concatenated_task_class_reports_df

    # return the actual function for using with .apply()
    return calculate_task_class_metrics


def get_all_class_reports(medic_df: pd.DataFrame, task:str) -> pd.DataFrame:

    task_columns_map = {
        disaster_types_key: ['earthquake', 'fire', 'flood', 'hurricane', 'landslide', 'not_disaster', 'other_disaster'],
        informative_key: ['informative', 'not_informative'],
        humanitarian_key: ['affected_injured_or_dead_people', 'infrastructure_and_utility_damage', 'not_humanitarian', 'rescue_volunteering_or_donation_effort'],
        damage_severity_key: ['little_or_none', 'mild', 'severe']
    }

    medic_df_class_report_all_samples = medic_df\
        .groupby(['split'], dropna=False)\
        .apply(calculate_task_class_metrics_closure(tasks=[task]))\
        .droplevel(level=0)

    medic_df_class_report_by_sensitive_groups = medic_df\
        .groupby(['sensitive group'], dropna=False)\
        .apply(calculate_task_class_metrics_closure(tasks=[task]))\
        .reset_index(level=0)\
        .fillna('not locatable')

    medic_df_class_report_joined = pd.concat([medic_df_class_report_all_samples, medic_df_class_report_by_sensitive_groups])\
        .fillna('all_samples')\
        .melt(
            id_vars=['metric', 'sensitive group'],
            value_vars=task_columns_map[task],
            var_name='class',
            value_name='metric value'
        )

    return medic_df_class_report_joined


def plot_class_scatter_plot(class_report_df: pd.DataFrame, x='support', y='f1-score', color='class', hover_data=['sensitive group'], width=600, height=400, marker_size=12, plot_correlations=True):

    df_for_scatter = class_report_df.pivot(columns=['metric'], index=['class', 'sensitive group'])
    df_for_scatter.columns = df_for_scatter.columns.droplevel(level=0)
    df_for_scatter = df_for_scatter.reset_index(level=[0, 1])

    # todo: should be removed... concludes all_samples in the correlation calculation
    # if plot_correlations:
        # print('all classes:')
        # print(df_for_scatter[['f1-score', 'precision', 'recall', 'support']].corr())
        # print('every category for its own:')
        # for category in df_for_scatter['class'].unique():
        #     print(category)
        #     # print(df_for_scatter[df_for_scatter['class'] == category])
        #     print(df_for_scatter[df_for_scatter['class'] == category][['f1-score', 'support']].corr())

    _fig = px.scatter(
        df_for_scatter,
        x=x,
        y=y,
        hover_data=hover_data,
        color=color,
        symbol=color,
        width=width,
        height=height
    )

    _fig.update_layout(
        font={'size': 8}
    )

    _fig.update_traces(marker_size=marker_size)

    _fig.show()


def collect_all_support_correlations(task_class_reports: List[Tuple[str, pd.DataFrame]]):

    f1s: List[List[float]] = []
    supports: List[List[int]] = []
    correlations: List[float] = []
    contexts: List[str] = []

    for task_name, class_report_df in task_class_reports:
        df_for_scatter = class_report_df.pivot(columns=['metric'], index=['class', 'sensitive group'])
        df_for_scatter.columns = df_for_scatter.columns.droplevel(level=0)
        df_for_scatter = df_for_scatter.reset_index(level=[0, 1])

        corr_all = df_for_scatter[df_for_scatter['sensitive group'] != 'all_samples'][['f1-score', 'support']].corr()
        corr_coef_all = corr_all.iloc[0, 1]
        correlations.append(corr_coef_all)
        contexts.append(f'{task_name} - all classes')
        f1s.append(list(df_for_scatter[df_for_scatter['sensitive group'] != 'all_samples']['f1-score']))
        supports.append(list(df_for_scatter[df_for_scatter['sensitive group'] != 'all_samples']['support']))

        for category in df_for_scatter['class'].unique():
            corr_class = df_for_scatter[(df_for_scatter['class'] == category) & (df_for_scatter['sensitive group'] != 'all_samples')][['f1-score', 'support']].corr()
            corr_coef_class = corr_class.iloc[0, 1]
            correlations.append(corr_coef_class)
            contexts.append(f'{task_name} - {category}')
            f1s.append(list(df_for_scatter[(df_for_scatter['class'] == category) & (df_for_scatter['sensitive group'] != 'all_samples')]['f1-score']))
            supports.append(list(df_for_scatter[(df_for_scatter['class'] == category) & (df_for_scatter['sensitive group'] != 'all_samples')]['support']))

    correlations_df = pd.DataFrame({
        'task (and class)': contexts,
        'correlation f1 to support': correlations,
        'f1-score values (A, B, C, n.l.)': f1s,
        'support values (A, B, C, n.l.)': supports
    })

    return correlations_df
In [11]:
df_medic_train = df_medic[df_medic['split'] == 'train']
df_medic_dev = df_medic[df_medic['split'] == 'dev']
df_medic_test = df_medic[df_medic['split'] == 'test']
print('amount of samples in train/dev/test:', len(df_medic_train), len(df_medic_dev), len(df_medic_test))
amount of samples in train/dev/test: 49353 6157 15688
In [19]:
df_medic_test_disaster_types = get_all_class_reports(df_medic_test, task=disaster_types_key)

df_medic_test_informative = get_all_class_reports(df_medic_test, task=informative_key)

df_medic_test_humanitarian = get_all_class_reports(df_medic_test, task=humanitarian_key)

df_medic_test_damage_severity = get_all_class_reports(df_medic_test, task=damage_severity_key)

Correlations of f1-score to support of every class in every task + of all classes together in a task¶

In [20]:
collect_all_support_correlations(
    [
        ('disaster_type', df_medic_test_disaster_types),
        ('informative', df_medic_test_informative),
        ('humanitarian', df_medic_test_humanitarian),
        ('damage_severity', df_medic_test_damage_severity)
    ]
)
Out[20]:
task (and class) correlation f1 to support f1-score values (A, B, C, n.l.) support values (A, B, C, n.l.)
0 disaster_type - all classes 0.584971 [0.35, 0.8, 0.86, 0.57, 0.81, 0.46, 0.39, 0.78... [94.0, 349.0, 929.0, 423.0, 266.0, 7.0, 29.0, ...
1 disaster_type - earthquake 0.794854 [0.35, 0.8, 0.86, 0.57] [94.0, 349.0, 929.0, 423.0]
2 disaster_type - fire 0.930358 [0.81, 0.46, 0.39, 0.78] [266.0, 7.0, 29.0, 388.0]
3 disaster_type - flood 0.890944 [0.78, 0.83, 0.51, 0.78] [480.0, 339.0, 41.0, 455.0]
4 disaster_type - hurricane 0.768808 [0.66, 0.51, 0.27, 0.55] [1066.0, 105.0, 53.0, 294.0]
5 disaster_type - landslide 0.957287 [0.2, 0.33, 0.17, 0.74] [14.0, 4.0, 11.0, 302.0]
6 disaster_type - not_disaster 0.110990 [0.91, 0.9, 0.9, 0.87] [3569.0, 1004.0, 1849.0, 2463.0]
7 disaster_type - other_disaster 0.963903 [0.13, 0.07, 0.21, 0.33] [191.0, 37.0, 238.0, 688.0]
8 informative - all classes -0.160969 [0.82, 0.88, 0.86, 0.87, 0.87, 0.87, 0.85, 0.86] [2234.0, 914.0, 1505.0, 2553.0, 3446.0, 931.0,...
9 informative - informative -0.473840 [0.82, 0.88, 0.86, 0.87] [2234.0, 914.0, 1505.0, 2553.0]
10 informative - not_informative 0.196733 [0.87, 0.87, 0.85, 0.86] [3446.0, 931.0, 1645.0, 2460.0]
11 humanitarian - all classes 0.757010 [0.2, 0.25, 0.59, 0.14, 0.82, 0.81, 0.86, 0.8,... [94.0, 89.0, 361.0, 95.0, 1633.0, 618.0, 976.0...
12 humanitarian - affected_injured_or_dead_people 0.970673 [0.2, 0.25, 0.59, 0.14] [94.0, 89.0, 361.0, 95.0]
13 humanitarian - infrastructure_and_utility_damage -0.409335 [0.82, 0.81, 0.86, 0.8] [1633.0, 618.0, 976.0, 1997.0]
14 humanitarian - not_humanitarian 0.287248 [0.91, 0.89, 0.87, 0.86] [3658.0, 967.0, 1675.0, 2845.0]
15 humanitarian - rescue_volunteering_or_donation... 0.703318 [0.38, 0.45, 0.21, 0.14] [295.0, 171.0, 138.0, 76.0]
16 damage_severity - all classes 0.668863 [0.93, 0.9, 0.92, 0.87, 0.2, 0.22, 0.13, 0.16,... [4080.0, 1115.0, 2008.0, 3049.0, 542.0, 226.0,...
17 damage_severity - little_or_none 0.194649 [0.93, 0.9, 0.92, 0.87] [4080.0, 1115.0, 2008.0, 3049.0]
18 damage_severity - mild 0.087564 [0.2, 0.22, 0.13, 0.16] [542.0, 226.0, 219.0, 540.0]
19 damage_severity - severe -0.457275 [0.7, 0.76, 0.83, 0.71] [1058.0, 504.0, 923.0, 1424.0]

Scatter plots of f1-score to support of every task¶

disaster_types¶

In [17]:
plot_class_scatter_plot(df_medic_test_disaster_types, height=400, width=600)

informative¶

In [21]:
plot_class_scatter_plot(df_medic_test_informative, height=400, width=600)

humanitarian¶

In [27]:
plot_class_scatter_plot(df_medic_test_humanitarian, height=400, width=600)

damage_severity¶

In [23]:
plot_class_scatter_plot(df_medic_test_damage_severity, height=400, width=600)